Redshift SQL Tricks

1) adding column indicating if row is first row

select c1,
       floor(1 / (row_number() over (partition BY c1 order by c2))) as is_first_row
from t

2) Pivot row into column

select pk,
       max(case  when c1 = 1 then c2 
                 else '#N/A'
           end) as c3,
       max(case  when c1 = 2 then c2 
                 else '#N/A'
           end) as c4,
from t
group by pk

3) pivoting column into row

select c2
from t
union
select c1
from t

4) adding column indicating if row is first row

join ON table with multiple primary_key (if one didnt create gk) and they are the same you dont lead to connect o account so you create prefix

coalesce('u' + t1.pk1::varchar(255),'s' + t1.pk2::varchar(255),'a' + t1.pk3::varchar(255)) =
coalesce('u' + t2.pk1::varchar(255),'s' + t2.pk2::varchar(255),'a' + t2.pk3::varchar(255))

5) check duplication on tables

select pk, 
       count(1)
from t 
group by pk 
having count(1)>1

6) convert timestamp to int

select EXTRACT('EPOCH' FROM c1)
from t

7) get rows which are not connected in left join

select *
   from t1
   left join t2 
        on t2.pk = t1.pk
   whenere t2.pk is null

7) profile over facts using sparsed matrix

when one construct profile table t1 from t2,t3,t4 facts the cleanest way to so is to insert into t1_temp the aggrigations over facts but then the granularity will stop be the pk of t1 because of sparisity so one need to group it all together

insert into t1_temp 
from <query over t2>;

insert into t1 
from <query over t3>;

insert into t1 
from <query over t4>;

insert into t1
from t1_temp
group by pk1;

8) exact duplication row_number

10) ri

good when pk is in the fact before in the dimention


In [ ]:
```sql
create table t3_que as
select c1
from t1_fact
left join t2_dim
     on t1_fact.pk = t2_dim.pk
where t2_dim is null
```

11) get value exactly after the last character _

select REVERSE(SPLIT_PART(REVERSE(c1),'_',1)) 
from t

12) checking wheter someone had 72 hour to of buisness hours

  • sunday/saturday he gets until end of monday
  • friday he gets 72 hours
  • otherwise he gets 24 hours
select c1,
       c2
from t1
where ((date_part(dow, c1)=6 AND datediff(day, c1, c2)<=2) OR
       (date_part(dow, c1)=0 AND datediff(day, c1, c2)<=1) OR
       (date_part(dow, c1)=5 AND datediff(hour, c1, c2) <= 72) OR
       (date_part(dow, c1) in (1,2,3,4) AND datediff(hour,c1, c2)<=24

13) constructing table over time from event history table

select  c1
        c2 AS from_time,
        isnull(LEAD(c2) OVER (PARTITION BY c1 ORDER BY c2 ), '2999-01-01')  AS to_time
from  t1
where event_name = 'x'
  • which allow you to allow you to do:
    • where t2.c1 BETWEEN t1.from_time AND t1.to_time

14) interval to window function using cross join(one can do changing interval using 2 cross join intervals)

create temp table exploded as
select row_number() over (order by true) as day
from t1 limit 10;


select exploded.day as day, 
       count(distinct c1) 
from t2
cross join exploded
where getdate() > t2.c2
      and getdate()< DATEADD('day',t2.c2)
group by day

14) generate unique gks

(select isnull(max(gk),0) from t) + row_number() over (order by 1)  as gk

15) flip flop

16) # of random rows

select * from table order by random() limit 1000;

17) % of random rows

select * from table where random() < 0.01;